package server;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class HTMLParser {
	
	public String[] parseHTML(String html, String url)
	{
		String[] linkList = new String[2000];
		Document doc = Jsoup.parse(html);
		Elements alinks = doc.select("a[href]");
		Elements llinks = doc.select("link[href]");

		if (!url.startsWith("http://"))
			url = "http://" + url;
		url = "http://" + url.split("/")[2];
		
		// System.out.println("ParseHTML -------------");
		
		int i = 0;
		for (Element link : alinks) {
			linkList[i] = link.attr("href");
			// System.out.println("[" + i + "] " + linkList[i]);
			i ++;
		}
		for (Element link : llinks) {
			linkList[i] = link.attr("href");
			// System.out.println("[" + i + "] " + linkList[i]);
			i ++;
		}
		
		for (i = 0; i < linkList.length && linkList[i] != null; ++i) {
			if (linkList[i].charAt(0) == '/')
				linkList[i] = url + linkList[i];			
		}
			
		// System.out.println("-----------------------");
		return linkList;
	}

}
